#/usr/bin/env python
# -*- coding: utf-8 -*-

'''
Created on 2011-11-9

@author: zhongfeng
'''

from egou.egoupageparser import *
from crawlerhttp import crawle
from pageparser import *
from dbproc.catagoryproc import *


def getContentFromUrlSum(urlsum):
    while True:
        result = crawle(urlsum)
        if result.code == 200:
            break
    content = result.content
    return content

def getAllSort1(content = None):
    if content is None:
        content = getContentFromUrlSum(egouRoot)
        #telCat = ObuyUrlSummary(url='http://www.egou.com/browse07.01/',catagoryLevel=1,parentPath=[egouRoot])
        #homeCat = ObuyUrlSummary(url='http://www.egou.com/browse07.02/',catagoryLevel=1,parentPath=[egouRoot])
        #computeCat = ObuyUrlSummary(url='http://www.egou.com/browse07.03/',catagoryLevel=1,parentPath=[egouRoot])
        #include=(telCat,homeCat,computeCat)
        parser = EGouSortParser(content, egouRoot)
        return parser.parserSubUrlSums()

def getAllSort3():
    result = []
    for sort_1 in getAllSort1():
        content = getContentFromUrlSum(sort_1)
        #telCat = ObuyUrlSummary(url='http://www.egou.com/browse07.01.01/',catagoryLevel=2)
        parser = EGouSort1PageParser(content, sort_1)
        for sort_2 in parser.parserSubUrlSums():
            content = getContentFromUrlSum(sort_2)
            parser = EGouSort2PageParser(content, sort_2)
            result.extend(parser.parserSubUrlSums())
    return result

if __name__ == '__main__':
    from itertools import chain
    import os
    with open('c:t.log','w') as output:
        result = getAllSort3()
        calEveryLevelCatNum(result)
        createSiteCat(result,u'55bigo')
        #for sort3 in getAllSort3():
            #output.write( '|'.join([cat.name.encode('gb18030') for cat in chain(sort3.parentPath,(sort3.parent,))]))
            #output.write(os.linesep)

        